library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.2
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
energy = read_csv("archive/organised_Gen.csv")
## New names:
## Rows: 496774 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): STATE, TYPE OF PRODUCER, ENERGY SOURCE dbl (4): ...1, YEAR, MONTH,
## GENERATION (Megawatthours)
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
colnames(energy) = c("ID", "year", "month", "state", "producer", "source", "generation")
energy = energy %>% select(-`ID`)
energy %>% 
  filter(producer == "Total Electric Power Industry", state == "US-TOTAL") %>% 
  ggplot() + 
  geom_point(aes(x = year, y = generation, color = source)) +
  geom_smooth(aes(x = year, y = generation, color = source)) +
  labs(title = "US total power generation per year")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

energy %>% 
  filter(producer == "Total Electric Power Industry", state == "TX") %>% 
  ggplot() + 
  geom_point(aes(x = year, y = generation, color = source)) +
  geom_smooth(aes(x = year, y = generation, color = source)) +
  labs(title = "Texas total power generation per year")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

energy %>% 
  filter(producer == "Total Electric Power Industry", state != "US-TOTAL") %>% 
  ggplot() +
  geom_point(aes(x = state, y = generation, color = source))

energy %>% 
  filter(producer == "Total Electric Power Industry", state != "US-TOTAL", generation < -5000) %>% 
  ggplot() +
  geom_point(aes(x = state, y = generation, color = source)) +
  labs(title = "States with negative power generation (<-5000)")

energy %>% 
  filter(producer == "Total Electric Power Industry", state == "TX") %>% 
  ggplot() + 
  geom_point(aes(x = month, y = generation, color = source)) +
  geom_smooth(aes(x = month, y = generation, color = source)) +
  labs(title = "Texas total power generation per month")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

energy %>% 
  ggplot() +
  geom_col(aes(x = year, y = generation, fill = source)) +
  facet_wrap(vars(source))

US goal is to install an average of 30 GW of solar capacity per year between now and 2025 and 60 GW per year from 2025-2030. https://www.renewable-ei.org/pdfdownload/activities/01_Key_AlejandroMoreno.pdf

total_energy = energy %>% filter(producer == "Total Electric Power Industry", state != "US-TOTAL") 
fig = energy %>% 
  filter(producer == "Total Electric Power Industry", state != "US-TOTAL") %>% 
  group_by(year, month, state) %>% 
  summarize(total = sum(generation)/2) %>% 
  right_join(total_energy) %>%
  filter(source != "Total") %>% 
  ggplot() +
  geom_col(aes(x = state, y = generation, fill = source))
## `summarise()` has grouped output by 'year', 'month'. You can override using the
## `.groups` argument.
## Joining, by = c("year", "month", "state")
ggplotly(fig, tooltip = "y")
energy %>% filter(producer == "Total Electric Power Industry", state != "US-TOTAL", source != "Total") %>% 
  group_by(year, month)
## # A tibble: 117,747 × 6
## # Groups:   year, month [257]
##     year month state producer                      source                gener…¹
##    <dbl> <dbl> <chr> <chr>                         <chr>                   <dbl>
##  1  2001     1 AK    Total Electric Power Industry Coal                    46903
##  2  2001     1 AK    Total Electric Power Industry Petroleum               71085
##  3  2001     1 AK    Total Electric Power Industry Natural Gas            367521
##  4  2001     1 AK    Total Electric Power Industry Hydroelectric Conven…  104549
##  5  2001     1 AK    Total Electric Power Industry Wind                       87
##  6  2001     1 AL    Total Electric Power Industry Coal                  6557913
##  7  2001     1 AL    Total Electric Power Industry Petroleum              107497
##  8  2001     1 AL    Total Electric Power Industry Natural Gas            566478
##  9  2001     1 AL    Total Electric Power Industry Other Gases             25283
## 10  2001     1 AL    Total Electric Power Industry Nuclear               2940300
## # … with 117,737 more rows, and abbreviated variable name ¹​generation